knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(GTFShift)
#> Warning: replacing previous import 'GTFSwizard::read_gtfs' by
#> 'tidytransit::read_gtfs' when loading 'GTFShift'
#> Warning: replacing previous import 'GTFSwizard::write_gtfs' by
#> 'tidytransit::write_gtfs' when loading 'GTFShift'

library(tidytransit)
library(mapview)
library(sf)
library(dplyr)

Introduction

Analyzing public transit feeds is important to understand its territorial coverage and dynamics, both on its spatial and temporal dimensions. GTFShift provides several methods that encapsulate pre-defined methodologies for them. This document explores their applicability with simple examples.

This article uses a GTFS feed from the library GTFS database for Portugal as an example. Refer to the for more details.

# Get GTFS from library GTFS database for Portugal
data <- read.csv(system.file("extdata", "gtfs_sources_pt.csv", package = "GTFShift"))
gtfs_id = "cascais"
gtfs_local <- GTFShift::download(data[data$ID==gtfs_id,]$URL, sprintf("database/transit/%s_gtfs.zip", data[data$ID==gtfs_id,]$ID))

# Load GTFS
gtfs <- tidytransit::read_gtfs(gtfs_local)

Analyse hourly frequency per stop

To analyse frequencies at stops, use GTFShift::get_stop_hour_frequency(), producing, for each, an aggregated counting of bus servicing it per hour.

# Perform frequency analysis
frequencies_stop <- GTFShift::get_stop_frequency_hourly(gtfs)
summary(frequencies_stop)
#>    stop_id               hour         frequency               geometry    
#>  Length:17573       Min.   : 6.00   Min.   : 1.000   POINT        :17573  
#>  Class :character   1st Qu.:10.00   1st Qu.: 2.000   epsg:4326    :    0  
#>  Mode  :character   Median :14.00   Median : 3.000   +proj=long...:    0  
#>                     Mean   :14.19   Mean   : 3.506                        
#>                     3rd Qu.:18.00   3rd Qu.: 4.000                        
#>                     Max.   :23.00   Max.   :37.000

Its returns an sf data.frame that can be displayed using mapview, or stored in GeoPackage format.

# Display map
mapview::mapview(frequencies_stop, zcol="frequency", legend=TRUE, cex=4)

# Store in GeoPackage format
st_write(frequencies_stop, "database/transit/bus_stop_frequency.gpkg", append=FALSE)
#> Deleting layer `bus_stop_frequency' using driver `GPKG'
#> Writing layer `bus_stop_frequency' to data source 
#>   `database/transit/bus_stop_frequency.gpkg' using driver `GPKG'
#> Writing 17573 features with 3 fields and geometry type Point.

Analyse hourly frequency per route

The frequency analysis can also be performed route wise. For this purpose, use GTFShift::get_route_hour_frequency(), returning aggregated results per hour.

The analysis can be performed for each route individually.

frequencies_route <- GTFShift::get_route_frequency_hourly(gtfs)
summary(frequencies_route)
#>    route_id         route_short_name    direction_id     arrival_hour  
#>  Length:1272        Length:1272        Min.   :0.0000   Min.   : 0.00  
#>  Class :character   Class :character   1st Qu.:0.0000   1st Qu.:10.00  
#>  Mode  :character   Mode  :character   Median :0.0000   Median :14.00  
#>                                        Mean   :0.4057   Mean   :13.71  
#>                                        3rd Qu.:1.0000   3rd Qu.:18.00  
#>                                        Max.   :1.0000   Max.   :23.00  
#>    frequency      shape_id                  geometry   
#>  Min.   :1.00   Length:1272        LINESTRING   :1272  
#>  1st Qu.:1.00   Class :character   epsg:4326    :   0  
#>  Median :1.00   Mode  :character   +proj=long...:   0  
#>  Mean   :1.61                                          
#>  3rd Qu.:2.00                                          
#>  Max.   :4.00

The overline parameter allows for an even more aggregated screening of the operation, clustering routes that overlap and converting them into a single route network. This allows for a better visualization of the volumes of frequencies per each segment of the network.

frequencies_route_overline <- GTFShift::get_route_frequency_hourly(gtfs, overline=TRUE)
summary(frequencies_route_overline)
#>    frequency       arrival_hour            geometry    
#>  Min.   : 1.000   Min.   : 0.00   LINESTRING   :68416  
#>  1st Qu.: 1.000   1st Qu.:10.00   epsg:4326    :    0  
#>  Median : 2.000   Median :14.00   +proj=long...:    0  
#>  Mean   : 3.304   Mean   :13.76                        
#>  3rd Qu.: 4.000   3rd Qu.:18.00                        
#>  Max.   :39.000   Max.   :23.00

Aggregated routes for 8 a.m.

mapview::mapview(
  frequencies_route %>% filter(arrival_hour==8 & frequency > 2),
  zcol = "frequency",
  layer.name = "Frequency (hour)"
)

Aggregated routes with overline for 8 a.m.

# above 2 per hour
mapview::mapview(
  frequencies_route_overline %>% filter(arrival_hour==8 & frequency > 2),
  zcol = "frequency",
  layer.name = "Frequency (hour)"
)